*************************************************************   
* do-file to create variables from HIS Graduate Panel Study	*
* fo 16.12.2015												*
*************************************************************

clear all
version 14
set more off

cd "E:\Forschung_offline\02_Bobbitt-Zeher\"
use "Data\ZA4272.dta", clear
svyset [pw=X1gewinsg]

lab def noyes 0"nein" 1"ja"
lab def noneuyes 0"nein oder neutral" 1"ja"

*** dependent variable: annual income ***
recode C2Jeinkom (-2 -1=.), gen(inc_mon)
gen inc_year=inc_mon*12
gen ln_inc_mon=ln(inc_mon)
gen ln_inc_year=ln(inc_year)

*** independent variables ***
* gender *
recode K1geschl (-1=.)(1=0)(2=1), gen(frau)
lab var frau "gender"
lab def frau 0"male" 1"female"
lab val frau frau
drop if frau==.
recode frau(0=1)(1=0), gen(mann)

* ses of family of origin *
recode K1bervat(-1 7=.)(4=1)(1 2 3 5=0), gen(arbeitervat)
lab var arbeitervat "father worker"
lab val arbeitervat noyes

recode K1berabvat (-1 7=.)(1 2=1)(3/6=0), gen(hsvat)
lab var hsvat "father has college degree"
lab val hsvat noyes

recode K1berabmut (-1 7=.)(1 2=1)(3/6=0), gen(hsmut)
lab var hsmut "mother has college degree"
lab val hsmut noyes

gen parses = hsvat + hsmut - arbeitervat
lab var parses "parent SES"
drop hsvat hsmut arbeitervat

* importance of having lots of money *
recode C1zigeld (-1=.)(1=4)(2=3)(3=2)(4=1)(5=0), gen(zigeld)
lab var zigeld "Ziel sehr gut verdienen"

* family characteristics *
recode K2kinder (-1=.)(1=1)(2=0), gen(kinder)
lab var kinder "Kinder"
lab val kinder noyes
gen kinderxfrau = kinder * frau

recode K2famsta (-1=.)(1 2=0)(3=1), gen(verhei)
lab var verhei "Verheiratet"
lab val verhei noyes
gen verheixfrau = verhei * frau

* hours worked per month *
recode C2wazhau (-1=.), gen (arbzt_woch)
gen arbzt_mon = arbzt_woch*4
gen lnarbzt_mon=ln(arbzt_mon)

* sector *
recode C2Joeffdi (-2 -1=.) (1=0)(2=1), gen(privat)

* industry *
recode C2Jbranche (-1 -2=.)(1=1)(2=5)(3 4 5 6=3)(7=2)(8=3)(9=7)(10 11=8)(12=15) ///
	(13=14)(14 15 16=11)(17=9)(18 19=10)(20 21=13)(22=18)(23 24 25 26=17)(27=10) ///
	(28=13)(29=18)(30=12)(31=18), gen(branche)

lab def branche 1"acriculture, forestry, horticulture occs" /// 
	2"Construction and allied" ///
	3"Manufacturing" ///
	5"Utilities" ///
	7"Retail and wholesale" ///
	8"Finance, insurance, real estate" ///
	9"Business, personal services" ///
	10"Entertainment, recreation" ///
	11"Professional services" ///
	12"Public administration, safety, military" ///
	13"Health care, social services" ///
	14"Communications" ///
	15"Transportation" ///
	16"Hospitality" ///
	17"Education" 18"Other services"
lab val branche branche	

* occupation *
recode C2beruf (-1 -2=.) ///
	  (110/629= 1) ///
	  (700/807= 2) ///
	(1010/1129= 3) ///
	(1210/1359= 4) ///
	(1310/1539= 5) ///
	(1610/5069=33) ///
	(6000/6129=22) ///
	(6200/6529=22) ///
	(6600/6899=24) ///
	(6910/7064=24) ///
	(7110/7449=33) ///
	(7501/7899=27) ///
	(7910/8145=28) ///
	(8211/8399=29) ///
	(8410/8599=30) ///
	(8610/8944=31) ///
	(9010/9379=33) ///
	(9711/9971=33), gen(beruf)

lab def beruf 1"Ia" ///
	22 "IVab - Engineers and Technicians" ///
	24 "Vab - merchants" ///
	27 "Vd - managerial, administrative" ///
	28 "Ve - protective service, criminal justice, military" ///
	29 "Vf - writers, publishers, artists, designers" ///
	30 "Vg - medical services" ///
	31 "Vh - social service, education" ///
	33 "Other"

lab val beruf beruf
numlabel beruf, add

* job training *
recode E2wbfno (-1=.)(1=0)(0=1), gen(jobtrain)

* job autonomy *
recode C2apeigen (-1 -2=.)(1=4)(2=3)(3=2)(4=1)(5=0), gen(auton1)
recode C2apfinan (-1 -2=.)(1=4)(2=3)(3=2)(4=1)(5=0), gen(auton3)
recode C2apvorde (-1 -2=.)(1=0)(2=1)(3=2)(4=3)(5=4), gen(auton4)
recode C2apselb  (-1 -2=.)(1=4)(2=3)(3=2)(4=1)(5=0), gen(auton5)
gen autonom = (auton1 + auton3 + auton4 + auton5) / (16/3)		/* scale to [0;3] */
alpha auton1 auton3 auton4 auton5
drop auton1 auton3 auton4 auton5

* standardized Test Scores; Grades *

* abitur / high school leaving certificate *
recode K1abnotbrd (-1=.)				/* for Western Germany */
gen abinotebrd= 40-K1abnotbrd			/* recode to higher is better grade */
egen abinotebrd_mean=mean(abinotebrd)	/* standardize */
egen abinotebrd_sd=sd(abinotebrd)
gen z_abinotebrd=(abinotebrd-abinotebrd_mean)/abinotebrd_sd

recode K1abnotddr (-1=.)(1=10)(2=20)(3=30)(4=40)	/* for Eastern Germany */
gen abinoteddr= 40-K1abnotddr			/* recode to higher is better grade */
egen abinoteddr_mean=mean(abinoteddr)	/* standardize */
egen abinoteddr_sd=sd(abinoteddr)
gen z_abinoteddr=(abinoteddr-abinoteddr_mean)/abinoteddr_sd

gen z_abinote=.
replace z_abinote=z_abinotebrd if z_abinotebrd!=.
replace z_abinote=z_abinoteddr if z_abinoteddr!=.

drop abinotebrd_mean abinotebrd_sd z_abinotebrd abinoteddr abinoteddr_mean abinoteddr_sd z_abinoteddr

* College Leaving Certificate *
recode B1examnote(-2=.)(-1=.), gen(ab_note)
recode B1punkte(-2=.)(-1=.), gen(ab_punkte)
gen ab_note2=60-ab_note
replace ab_note2=ab_punkte if B1ber1ab1 == 28
bysort B1ber1ab1: egen abnot_mean=mean(ab_note2)
bysort B1ber1ab1: egen abnot_sd=sd(ab_note2)
gen z_examen= (ab_note2-abnot_mean)/abnot_sd

drop ab_note-abnot_sd

* college major *
drop if B1ber1ab1==-1
recode B1ber1ab1(-1=.), gen(fach)
quietly tab B1ber1ab1, gen(fachdum)

	* teaching track *
	recode B1absart1 (5/11=1)(1/4 12/16=0), gen(lehramt)

gen fachgruppe = .
replace fachgruppe = 1 if B1ber1ab1 == 29 | B1ber1ab1 == 30 	/* Business, Econ. Sciences */
replace fachgruppe = 2 if B1ber1ab1 == 31 | B1ber1ab1 == 57 | B1ber1ab1 == 58 | B1ber1ab1 == 59 | B1ber1ab1 == 60 | ///
	B1ber1ab1 == 61 | B1ber1ab1 == 62 | B1ber1ab1 == 63 | B1ber1ab1 == 64 | B1ber1ab1 == 65 | B1ber1ab1 == 66 | B1ber1ab1 == 67 | B1ber1ab1 == 68 | B1ber1ab1 == 69 | ///
	B1ber1ab1 == 36 | B1ber1ab1 == 39 | B1ber1ab1 == 40 | B1ber1ab1 == 41 | B1ber1ab1 == 42| B1ber1ab1 == 43 | B1ber1ab1 == 44 | ///
	B1ber1ab1 == 37 | B1ber1ab1 == 38 	/* Math, Natural Science, Engineering */
replace fachgruppe = 3 if B1ber1ab1 == 2 | B1ber1ab1 == 3 | B1ber1ab1 == 4 | B1ber1ab1 == 5 | B1ber1ab1 == 6 | ///
	B1ber1ab1 == 7 | B1ber1ab1 == 8| B1ber1ab1 == 9 | B1ber1ab1 == 10 | B1ber1ab1 == 11 | B1ber1ab1 == 12 | B1ber1ab1 == 13 | B1ber1ab1 == 14 | B1ber1ab1 == 1 | ///
	B1ber1ab1 == 23 | B1ber1ab1 == 25 | B1ber1ab1 == 26 | B1ber1ab1 == 27	/* Humanities and Social Science */
replace fachgruppe = 4 if B1ber1ab1 == 16 | B1ber1ab1 == 17 | lehramt == 1  /* Education */
replace fachgruppe = 5 if B1ber1ab1 == 15 | B1ber1ab1 == 22 | B1ber1ab1 == 49 | B1ber1ab1 == 50 | B1ber1ab1 == 51 /* Medical Science */
replace fachgruppe = 6 if B1ber1ab1 == 74 | B1ber1ab1 == 76 | B1ber1ab1 == 77 | B1ber1ab1 == 78		/* Art */
replace fachgruppe = 7 if B1ber1ab1 == 28	/* Law */
replace fachgruppe =. if B1ber1ab1 == -1

lab var fachgruppe "Fachgruppe"
lab def fachgruppe 1 "Business, Econ.Sci." 2 "Math, Nat.Sci. Engin." 3 "Soc.Sci, Humanities" 4 "Education" 5 "Medical Science" 6 "Art" 7 "Law"
lab val fachgruppe fachgruppe

tab fachgruppe, gen(fachgr_)
rename fachgr_1 fachgr_wiwi
rename fachgr_2 fachgr_natur
rename fachgr_3 fachgr_geist
rename fachgr_4 fachgr_erzieh
rename fachgr_5 fachgr_med
rename fachgr_6 fachgr_kunst
rename fachgr_7 fachgr_jura

*** percentage female of major ***
bysort B1ber1ab1: egen fa_frau=mean(frau)

* highest degree *
recode D2promo (-2=0)(2=0)(3=0)(4=0)(1=1), gen(promo_fertig)
recode promo_fertig (1=0)(0=1), gen(keine_promo)
lab var promo_fertig "Abgeschlossene Promotion"

* institutional selectivity *
recode B1hsart (1=1)(2=0), gen(unifh)
lab var unifh "Uni oder FH"
lab def unifh 0"Uni" 1"FH"
lab val unifh unifh

* time studied (for descriptive statement in text) *
recode B1serstim (1=4)(2=10), gen(anfmon)	/* month studying began */
gen start=ym(B1jerstim, anfmon)
gen ende=ym(B1jprende, B1moprende)
gen studauer = ende - start

* employment
recode G2zterw (1=1)(2=0), gen(employ)

*********************
*** select sample ***
*********************
keep if employ == 1

recode K1gebjahr (-1=.)
gen age=2002-K1gebjahr
drop if age > 34				/* no one who was older than 30 when graduating from college */
drop if inc_year == .			/* as in Bobbitt-Zeher */

drop if C2wazneb!=-2			/* hours worked is unreliable for persons with multiple jobs */

gen wage = inc_mon/arbzt_mon
drop if wage >=0 & wage <=2		/* as in Bobbitt-Zeher, less than 2 Euros per hr is an implausibly low wage */

egen stdev = sd(inc_year)
qui mean stdev
global censor = _b[stdev] * 4 
drop if inc_year > $censor		/* as in Bobbitt-Zeher */

drop if frau == . | kinder == . | verhei == .	/* don't want to impute that, 20 obs dropped */

svyset[pw=X1gewinsg]
svy: mean studauer if arbzt_woch >= 35

egen miss = rowmiss (inc_year z_abinote z_examen fachgr_wiwi fachgr_natur fachgr_geist ///
	fachgr_erzieh fachgr_med fachgr_kunst fachgr_jura fa_frau keine_promo promo_fertig unifh ///
	parses zigeld verhei kinder arbzt_woch privat ///
	beruf branche autonom jobtrain)
tab miss if arbzt_woch >= 35
	
	
************************************
*** imputation of missing values ***
************************************
sort id_suf

* impute industry: difficult for mlogit because many very small categories hence:
* assign mode industry according to field of study (3 obs) * 
egen mode = mode(branche) 	if branche!=. & fachgruppe == 2
mean mode
replace branche = _b[mode]	if branche==. & fachgruppe == 2
drop mode
	
egen mode = mode(branche) 	if branche!=. & fachgruppe == 6
mean mode
replace branche = _b[mode]	if branche==. & fachgruppe == 6
drop mode
	
qui tab branche, gen(branche_dum)	
	rename branche_dum1  ind_agric
	rename branche_dum2  ind_const
	rename branche_dum3  ind_manuf
	rename branche_dum4  ind_utili
	rename branche_dum5  ind_retail
	rename branche_dum6  ind_finance
	rename branche_dum7  ind_busserv
	rename branche_dum8  ind_entert
	rename branche_dum9  ind_profserv
	rename branche_dum10 ind_publicadm
	rename branche_dum11 ind_health
	rename branche_dum12 ind_communic
	rename branche_dum13 ind_transp
	rename branche_dum14 ind_educat
	rename branche_dum15 ind_other

*	impute occupation: difficult for mlogit because many very small categories hence:	*
* 	assign to detailed occupation with probability equal to relative frequency of that occupation among respondents with same field of study.	*

* for fachgr_wiwi
svy: tab beruf if fachgr_wiwi==1	/* get relative frequencies */
	mat fgr = e(b)
set seed 893658
gen random1 = runiform()
replace beruf = 22 if beruf ==. & fachgr_wiwi==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 24 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 25 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 27 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 28 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 29 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 30 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
replace beruf = 31 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8])
replace beruf = 33 if beruf ==. & fachgr_wiwi==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9])
drop random1

* for fachgr_natur
svy: tab beruf if fachgr_natur==1	/* get relative frequencies */
	mat fgr = e(b)
set seed 99477	
gen random1 = runiform()
replace beruf = 1 if beruf ==. & fachgr_natur==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 22 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 24 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 25 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 27 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 28 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 29 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
replace beruf = 30 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8])
replace beruf = 31 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9])
replace beruf = 33 if beruf ==. & fachgr_natur==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10])
drop random1

* for fachgr_geist
svy: tab beruf if fachgr_geist==1	/* get relative frequencies */
	mat fgr = e(b)
set seed 473456	
gen random1 = runiform()
replace beruf = 22 if beruf ==. & fachgr_geist==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 24 if beruf ==. & fachgr_geist==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 25 if beruf ==. & fachgr_geist==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 27 if beruf ==. & fachgr_geist==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 29 if beruf ==. & fachgr_geist==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 31 if beruf ==. & fachgr_geist==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 33 if beruf ==. & fachgr_geist==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
drop random1

* for fachgr_erzieh
svy: tab beruf if fachgr_erzieh==1	/* get relative frequencies */
	mat fgr = e(b)
set seed 20656	
gen random1 = runiform()
replace beruf = 22 if beruf ==. & fachgr_erzieh==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 24 if beruf ==. & fachgr_erzieh==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 25 if beruf ==. & fachgr_erzieh==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 27 if beruf ==. & fachgr_erzieh==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 28 if beruf ==. & fachgr_erzieh==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 29 if beruf ==. & fachgr_erzieh==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 31 if beruf ==. & fachgr_erzieh==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
replace beruf = 33 if beruf ==. & fachgr_erzieh==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8])
drop random1

* for fachgr_med
svy: tab beruf if fachgr_med==1	/* get relative frequencies */
	mat fgr = e(b)
set seed 13009	
gen random1 = runiform()
replace beruf = 22 if beruf ==. & fachgr_med==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 24 if beruf ==. & fachgr_med==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 25 if beruf ==. & fachgr_med==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 27 if beruf ==. & fachgr_med==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 28 if beruf ==. & fachgr_med==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 29 if beruf ==. & fachgr_med==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 30 if beruf ==. & fachgr_med==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
replace beruf = 31 if beruf ==. & fachgr_med==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8])
drop random1

* for fachgr_kunst
svy: tab beruf if fachgr_kunst==1	/* get relative frequencies */
	mat fgr = e(b)
set seed 726703	
gen random1 = runiform()
replace beruf = 24 if beruf ==. & fachgr_kunst==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 25 if beruf ==. & fachgr_kunst==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 27 if beruf ==. & fachgr_kunst==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 29 if beruf ==. & fachgr_kunst==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 30 if beruf ==. & fachgr_kunst==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 31 if beruf ==. & fachgr_kunst==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
drop random1

* for fachgr_jura
svy: tab beruf if fachgr_jura==1	/* get relative frequencies */
	mat fgr = e(b)
set seed 39756
gen random1 = runiform()
replace beruf = 24 if beruf ==. & fachgr_jura==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 25 if beruf ==. & fachgr_jura==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 27 if beruf ==. & fachgr_jura==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 28 if beruf ==. & fachgr_jura==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 29 if beruf ==. & fachgr_jura==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 31 if beruf ==. & fachgr_jura==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 33 if beruf ==. & fachgr_jura==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
drop random1

recode beruf (25=24)  /* merge two types of merchants */ 
tab beruf, gen(beruf)
rename beruf1 beruf_agricult
rename beruf2 beruf_engineer
rename beruf3 beruf_merchant
rename beruf4 beruf_manager
rename beruf5 beruf_security
rename beruf6 beruf_writers
rename beruf7 beruf_medical
rename beruf8 beruf_social
rename beruf9 beruf_other

* define upper and lower bounds for certain variables *
qui sum z_abinote
gen z_abinote_l = r(min)
gen z_abinote_u = r(max)

qui sum z_examen
gen z_examen_l = r(min)
gen z_examen_u = r(max)

gen parses_l = -1	
gen parses_u = 2	
gen zigeld_l = 0	
gen zigeld_u = 4	
gen autonom_l = 0	
gen autonom_u = 3	

local incomplete 	= "z_abinote z_examen autonom parses zigeld jobtrain privat" 
local complete 		= "kinder verhei arbzt_woch ind_agric ind_const ind_manuf ind_utili ind_retail ind_finance ind_busserv ind_entert ind_profserv ind_publicadm ind_health ind_communic ind_transp ind_educat ind_other fachgr_wiwi fachgr_natur fachgr_geist fachgr_erzieh fachgr_med fachgr_kunst fachgr_jura promo_fertig unifh fa_frau"

sort id_suf
mi set wide
mi register imputed `incomplete'
mi register regular `complete' frau X1gewinsg beruf branche
mi svyset [pw=X1gewinsg]

mi impute chained 	(regress) z_abinote z_examen autonom parses zigeld ///
					(logit) jobtrain privat ///
					= kinder verhei arbzt_woch fachgr_wiwi fachgr_natur fachgr_geist ///
					fachgr_erzieh fachgr_med fachgr_kunst fachgr_jura promo_fertig ///
					unifh fa_frau, by(frau) replace add(30) rseed(83749057)

* censor imputed values *
foreach var of varlist z_abinote z_examen parses zigeld autonom	{
	dis " "
	replace _1_`var' = `var'_l if _1_`var'<`var'_l & `var' == .   /* replace the imputed value with the lower bound if the imputed value lies below lower bound */
	replace _1_`var' = `var'_u if _1_`var'>`var'_u & `var' == .   /* replace the imputed value with the upper bound if the imputed value lies above upper bound */
	replace `var' = _1_`var' if `var' ==.			 /* use imputed value if value is missing */
	}			
	replace jobtrain = _1_jobtrain if jobtrain == .
	replace privat = _1_privat if privat == .


sum inc_year z_abinote z_examen autonom parses zigeld jobtrain privat beruf branche
qui tab beruf, gen(beruf)
qui tab branche, gen(branche)

save "Data\imputed.dta" , replace

*** END OF DOFILE ***
